## packages: remove or add your necessary packages

required_packages <- c("tidyverse", "readxl", "ggthemes", "hrbrthemes", "extrafont", "plotly", "scales", "stringr", "gganimate", "here", "tidytext", "sentimentr", "scales", "DT", "here", "sm", "mblm", "glue", "fs", "knitr", "rmdformats", "janitor", "urltools", "colorspace", "pdftools")

library(ggplot2)    # CRAN v3.3.6
library(colorspace) # CRAN v2.0-3
library(here)       # CRAN v1.0.1
library(dplyr)      # CRAN v1.0.10
library(janitor)    # CRAN v2.1.0
library(gt)         # CRAN v0.5.0
library(tidyr)      # CRAN v1.2.1
library(readr)      # CRAN v2.1.3
library(stringr)    # CRAN v1.4.1
library(tidytext)
library(ggalt)
library(forcats)
library(lubridate)
library(ggforce)

# for(i in required_packages) { 
# if(!require(i, character.only = T)) {
# 
# #  if package is not existing, install then load the package
# install.packages(i, dependencies = T)
# require(i, character.only = T)
# }
# }


## save plots?
save <- TRUE
#save <- FALSE

## quality of png's
dpi <- 750

## font adjust; please adjust to client´s website
#extrafont::loadfonts(device = "win", quiet = TRUE)
#font_add_google("Montserrat", "Montserrat")
# font_add_google("Overpass", "Overpass")
# font_add_google("Overpass Mono", "Overpass Mono")



## theme updates; please adjust to client´s website
#theme_set(ggthemes::theme_clean(base_size = 15))
theme_set(ggthemes::theme_clean(base_size = 15, base_family = "Montserrat"))


theme_update(plot.margin = margin(30, 30, 30, 30),
             plot.background = element_rect(color = "white",
                                            fill = "white"),
             plot.title = element_text(size = 20,
                                       face = "bold",
                                       lineheight = 1.05,
                                       hjust = .5,
                                       margin = margin(10, 0, 25, 0)),
             plot.title.position = "plot",
             plot.caption = element_text(color = "grey40",
                                         size = 9,
                                         margin = margin(20, 0, -20, 0)),
             plot.caption.position = "plot",
             axis.line.x = element_line(color = "black",
                                        size = .8),
             axis.line.y = element_line(color = "black",
                                        size = .8),
             axis.title.x = element_text(size = 16,
                                         face = "bold",
                                         margin = margin(t = 20)),
             axis.title.y = element_text(size = 16,
                                         face = "bold",
                                         margin = margin(r = 20)),
             axis.text = element_text(size = 11,
                                      color = "black",
                                      face = "bold"),
             axis.text.x = element_text(margin = margin(t = 10)),
             axis.text.y = element_text(margin = margin(r = 10)),
             axis.ticks = element_blank(),
             panel.grid.major.x = element_line(size = .6,
                                               color = "#eaeaea",
                                               linetype = "solid"),
             panel.grid.major.y = element_line(size = .6,
                                               color = "#eaeaea",
                                               linetype = "solid"),
             panel.grid.minor.x = element_line(size = .6,
                                               color = "#eaeaea",
                                               linetype = "solid"),
             panel.grid.minor.y = element_blank(),
             panel.spacing.x = unit(4, "lines"),
             panel.spacing.y = unit(2, "lines"),
             legend.position = "top",
             legend.title = element_text(family = "Montserrat",
                                         color = "black",
                                         size = 14,
                                         margin = margin(5, 0, 5, 0)),
             legend.text = element_text(family = "Montserrat",
                                        color = "black",
                                        size = 11,
                                        margin = margin(4.5, 4.5, 4.5, 4.5)),
             legend.background = element_rect(fill = NA,
                                              color = NA),
             legend.key = element_rect(color = NA, fill = NA),
             #legend.key.width = unit(5, "lines"),
             #legend.spacing.x = unit(.05, "pt"),
             #legend.spacing.y = unit(.55, "pt"),
             #legend.margin = margin(0, 0, 10, 0),
             strip.text = element_text(face = "bold",
                                       margin = margin(b = 10)))

## theme settings for flipped plots
theme_flip <-
  theme(panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_line(size = .6,
                                          color = "#eaeaea"))

## theme settings for maps
theme_map <- 
  theme_void(base_family = "Montserrat") +
  theme(legend.direction = "horizontal",
        legend.box = "horizontal",
        legend.margin = margin(10, 10, 10, 10),
        legend.title = element_text(size = 17, 
                                    face = "bold"),
        legend.text = element_text(color = "grey33",
                                   size = 12),
        plot.margin = margin(15, 5, 15, 5),
        plot.title = element_text(face = "bold",
                                  size = 20,
                                  hjust = .5,
                                  margin = margin(30, 0, 10, 0)),
        plot.subtitle = element_text(face = "bold",
                                     color = "grey33",
                                     size = 17,
                                     hjust = .5,
                                     margin = margin(10, 0, -30, 0)),
        plot.caption = element_text(size = 14,
                                    color = "grey33",
                                    hjust = .97,
                                    margin = margin(-30, 0, 0, 0)))

## numeric format for labels
num_format <- scales::format_format(big.mark = ",", small.mark = ",", scientific = F)

## main color backlinko
bl_col <- "#00d188"
bl_dark <- darken(bl_col, .3, space = "HLS")

## colors + labels for interval stripes
int_cols <- c("#bce2d5", "#79d8b6", bl_col, "#009f66", "#006c45", "#003925")
int_perc <- c("100%", "95%", "75%", "50%", "25%", "5%")

## colors for degrees (Bachelors, Massters, Doctorate in reverse order)
cols_degree <- c("#e64500", "#FFCC00", darken(bl_col, .1))

## gradient colors for position
colfunc <- colorRampPalette(c(bl_col, "#bce2d5"))
pos_cols <- colfunc(10)

Load data

youtube_data <- read_csv(here("proc_data","youtube_data_proc.csv"))
youtube_data_activities <- read_csv(here("proc_data","youtube_data_activities_proc.csv"))
tiktok_data <-  read_csv(here("proc_data","tiktok_data_proc.csv"))
tiktok_data_activities <-  read_csv(here("proc_data","tiktok_data_activities_proc.csv"))

Calculate general stats

yt_vids <- youtube_data %>% distinct(yt_video_id) %>% nrow()
tt_vids <- tiktok_data %>% distinct(tt_video_id) %>% nrow()
youtube_data_activities %>% group_by(yt_video_id) %>% summarise(idn=max(idea)) %>% 
   pull(idn) %>% {length(which(.>1))}-> mult_ideas_yt
tiktok_data_activities %>% group_by(tt_video_id) %>% summarise(idn=max(idea)) %>% 
   pull(idn) %>% {length(which(.>1))} -> mult_ideas_tt

meanytlength <- youtube_data$video_length %>% summary %>% {./60}
meanttlength <- tiktok_data$video_meta_duration %>% summary 

General stats

YouTube: 177 videos (unique video url identifiers, includes YT shorts)
TikTok: 177 videos

YouTube videos are longer (12.15141 minutes on average for the sampled videos), so approximately one third of the videos examined (53/177) included >1 money-making idea. TikTok videos have a shorter maximum length (3 to 10 minutes; 41.14384 seconds on average for the sampled videos) so videos on this platform tend to feature a single idea. Only 4 of the 145 TikTok videos examined provided more than one money-making idea.

Publication dates

youtube_data <- youtube_data %>% mutate(month=month(ymd(youtube_data$publish_date)),
                        pyear=year(ymd(youtube_data$publish_date))) %>% 
  mutate(pub_date=ymd(publish_date))


tiktok_data <- tiktok_data %>% mutate(month=month(ymd_hms(tiktok_data$create_time_iso)),                                pyear=year(ymd_hms(tiktok_data$create_time_iso))) %>%
  mutate(pub_date=date(ymd_hms(create_time_iso))) 

3/4 of the YouTube videos examined were published in 2022, and across all the videos sampled (published since 2018), most are from the summer/fall season (Northern Hemisphere).

TikTok videos in the sample were published between 2019-2022, with more videos uploaded with each passing year. The month with most uploads is July.

tiktok_data %>% tabyl(pyear) %>% round(2)
##  pyear  n percent
##   2019  4    0.03
##   2020 29    0.20
##   2021 50    0.34
##   2022 63    0.43

Publication month also varied between platforms.

youtube_data %>% count(month) %>% 
  ggplot()+
  geom_bar(aes(x=month,y=n),stat = "identity")+
  scale_x_discrete(limits=month.abb) +labs(subtitle = "YouTube data")

tiktok_data %>% count(month) %>% 
  ggplot()+
  geom_bar(aes(x=month,y=n),stat = "identity")+
  scale_x_discrete(limits=month.abb) +labs(subtitle = "TikTok data")

Considering publication dates, videos published earlier do not tend to accumulate more views and comments over time. Engagement is also mostly unrelated to subscriber/follower counts and thus possibly related to content.

ttdatevc <- tiktok_data %>% select(source,pub_date,
                       comments=comment_count,
                       views=play_count,
                       followers=author_meta_fans)
ytdatevc <- youtube_data %>% select(source,pub_date,
                       comments=comments,
                       views=view_count,
                       followers=subs_numeric)
dates_views_comments <- bind_rows(ttdatevc,ytdatevc)

ggplot(dates_views_comments)+
  geom_point(aes(x=pub_date,y=views,color=source))+
  labs(x="Publication date")

ggplot(dates_views_comments)+
  geom_point(aes(x=pub_date,y=comments,color=source))+
    labs(x="Publication date")

ggplot(dates_views_comments)+
  geom_point(aes(x=views,y=comments,color=source))

ggplot(dates_views_comments)+
  geom_point(aes(x=followers,y=comments,color=source))

dates_views_comments %>% filter(followers!=44100000) %>% 
ggplot()+
  geom_point(aes(x=followers,y=comments,color=source))+
  labs(subtitle = "removed outlier")

ggplot(dates_views_comments)+
  geom_point(aes(x=followers,y=views,color=source))

dates_views_comments %>% filter(followers!=44100000) %>% 
ggplot()+
  geom_point(aes(x=followers,y=views,color=source))+
  labs(subtitle = "removed outlier")

Presenter demographics

yt_presenter_demog_gend <- youtube_data %>% tabyl(presenter_gender) %>% 
  mutate(valid_percent=round(valid_percent,2))
yt_malepct <- yt_presenter_demog_gend$valid_percent[2] 
tt_presenter_demog_gend <- tiktok_data %>% tabyl(presenter_gender) %>% 
  mutate(valid_percent=round(valid_percent,2))
tt_malepct <- tt_presenter_demog_gend$valid_percent[2] 
yt_ages <- youtube_data %>% tabyl(presenter_age) %>% na.omit() %>% select(-percent) %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent)
tt_ages <- tiktok_data %>% tabyl(presenter_age) %>% na.omit() %>% select(-percent) %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent)

Male presenters were represented more on both platforms (YouTube: 0.86% and TikTok 0.8%), and the 20-30 y.o. age category had the highest proportion with ~40%.

gt(yt_ages) %>% tab_header("YouTube")
YouTube
presenter_age n percent
10 - 20 6 0.03
20 - 30 76 0.43
30 - 40 49 0.28
40 - 50 4 0.02
50+ 3 0.02
Voice-over 20 0.11
Voice-over Text-to-Speech 18 0.10
gt(tt_ages) %>% tab_header("TikTok")
TikTok
presenter_age n percent
10 - 20 14 0.10
20 - 30 57 0.40
30 - 40 24 0.17
40 - 50 5 0.04
50+ 2 0.01
Music 8 0.06
Voice-over 6 0.04
Voice-over Text-to-Speech 25 0.18

Categories

YouTube videos, as categorized by their authors, varied in assignment despite the similar overarching topic.

The most common category was Education, followed by How-to % Stlye, and then all the others.

youtube_data %>% tabyl(category) %>% arrange(-n) %>% 
  mutate(across(where(is.numeric),round,2)) %>% gt() %>% tab_header(title = "YouTube data")
YouTube data
category n percent
Education 96 0.54
Howto & Style 46 0.26
People & Blogs 27 0.15
Entertainment 7 0.04
News & Politics 1 0.01

Earnings data

ytearn <- 
youtube_data_activities %>% 
  mutate(earnings_1_unit=earnings/earnings_timeframe_number) %>% 
  group_by(yt_video_id,idea,earnings_timeframe) %>% 
  summarise(earn=mean(earnings_1_unit,na.rm=TRUE)) %>% ungroup() %>% na.omit() %>% 
  filter(earnings_timeframe!="No timeframe provided")
ttearn <- 
  tiktok_data_activities %>% 
  mutate(earnings_1_unit=earnings/earnings_timeframe_number) %>% 
  group_by(tt_video_id,idea,earnings_timeframe) %>% 
  summarise(earn=mean(earnings_1_unit,na.rm=TRUE)) %>% ungroup() %>% na.omit() %>% 
  filter(earnings_timeframe!="No timeframe provided")

# earnings time frames
earn_tf <- bind_rows(ytearn,ttearn) %>% count(earnings_timeframe) %>% arrange(-n)

earnings_by_tf <- 
bind_rows(ytearn,ttearn) %>% group_by(earnings_timeframe) %>% 
  summarize(median_earn=median(earn),
            min_earn=min(earn),max_earn=max(earn),
            sd_earn=sd(earn,na.rm = TRUE)) %>% arrange(-median_earn)
  • The most common time frame for earnings was for daily income, followed by months and hours.
gt(earn_tf)
earnings_timeframe n
Days 67
Months 65
Hours 56
Minutes 20
Weeks 9
Years 5
Per Post 1
  • Longer time frames report higher median earnings (per 1 unit of time, not standardized)
gt(earnings_by_tf)%>% 
  fmt_number(
    columns = -earnings_timeframe,  
    decimals = 1,
    use_seps = FALSE
  )
earnings_timeframe median_earn min_earn max_earn sd_earn
Years 100000.0 500.0 400000.0 155449.2
Months 5000.0 15.0 300000.0 50262.5
Weeks 1050.0 24.0 14000.0 4357.9
Days 500.0 2.6 7000.0 1332.4
Hours 35.5 3.0 487.9 110.4
Per Post 33.3 33.3 33.3 NA
Minutes 5.1 0.1 30.0 6.7

Standardized earnings

temporal_earn <- c("Days","Hours","Minutes","Months","Weeks","Years")

yt_tempearn <- ytearn %>% filter(earnings_timeframe %in% temporal_earn)
tt_tempearn <- ttearn %>% filter(earnings_timeframe %in% temporal_earn)

yt_hourly_earn <- 
yt_tempearn %>% mutate(hourly_earn=case_when(
  earnings_timeframe=="Hours"~earn,
  earnings_timeframe=="Minutes"~earn*60,
  earnings_timeframe=="Days"~earn/8,
  earnings_timeframe=="Weeks"~earn/40,
  earnings_timeframe=="Months"~earn/200,
  earnings_timeframe=="Years"~earn/2400
)) %>% mutate(source="YouTube")

tt_hourly_earn <- 
  tt_tempearn %>% mutate(hourly_earn=case_when(
    earnings_timeframe=="Hours"~earn,
    earnings_timeframe=="Minutes"~earn*60,
    earnings_timeframe=="Days"~earn/8,
    earnings_timeframe=="Weeks"~earn/40,
    earnings_timeframe=="Months"~earn/200,
    earnings_timeframe=="Years"~earn/2400
  )) %>% mutate(source="TikTok")

all_earn <- bind_rows(yt_hourly_earn,tt_hourly_earn) 

tt_h_earnsum <- 
tt_hourly_earn %>% summarize(mean_earn=mean(hourly_earn),
                             median_earn=median(hourly_earn))
yt_h_earnsum <- 
yt_hourly_earn %>% summarize(mean_earn=mean(hourly_earn),
                             median_earn=median(hourly_earn))

hourly_med <- median(all_earn$hourly_earn)


bind_rows(yt_hourly_earn,tt_hourly_earn) %>% 
  ggplot()+
  geom_histogram(aes(hourly_earn,fill=source),color="black",alpha=0.5)

For videos that report earnings associated with a temporal reference ($ earned per unit of time), earnings can be reported in a common unit by assuming 8 hour work days and 5 day work weeks. The median hourly earnings is 50.

Once standardized, the timeframe with the highest median earnings was minutes. For this time frame, 18 of 20 videos suggest that it is possible to make between 1 and 30 USD per minute with their ideas. This becomes up to $1800 per hour.

all_earn %>% group_by(earnings_timeframe) %>% 
  summarize(median_earn=median(hourly_earn),
            min_earn=min(hourly_earn),max_earn=max(hourly_earn),
            sd_earn=sd(hourly_earn,na.rm = TRUE)) %>% arrange(-median_earn) %>% gt() %>% 
  fmt_number(
    columns = -earnings_timeframe,  
    decimals = 1,
    use_seps = FALSE
  )
earnings_timeframe median_earn min_earn max_earn sd_earn
Minutes 306.0 3.0 1800.0 403.6
Days 62.5 0.3 875.0 166.6
Years 41.7 0.2 166.7 64.8
Hours 35.5 3.0 487.9 110.4
Weeks 26.2 0.6 350.0 108.9
Months 25.0 0.1 1500.0 251.3

Across all videos, earnings are right-skewed. 90% of videos report hourly earnings < 384.138.

std hourly earnings varied by platform

This distribution is also evident within earnings timeframes.

bind_rows(yt_hourly_earn,tt_hourly_earn) %>% 
  ggplot()+
  geom_histogram(aes(earn))+
  facet_wrap(~earnings_timeframe,scales = 'free')

Earnings by category (YouTube)

The more common categories (Education, Howto & Style) did not report the higher mean or median standardized earnings. Instead, the People and Blogs category and Entertainment had the top two positions.

yt_hourlycorrs <- left_join(yt_hourly_earn,youtube_data_activities)

yt_hourlycorrs_chp <- yt_hourlycorrs  %>% group_by(yt_video_id,idea) %>%
  chop(business_type_level_1) %>% chop(business_type_level_2) %>% chop(skills_required) %>% ungroup()
yt_hourlycorrs_chp %>% 
  group_by(category) %>% summarise(mean_earn=mean(hourly_earn),
                                   med_earn=median(hourly_earn)) %>% 
  arrange(-mean_earn) %>% gt()
category mean_earn med_earn
People & Blogs 386.75750 168.75
Entertainment 127.05000 125.00
Howto & Style 105.40231 80.00
Education 98.01758 40.00
yt_hourlycorrs_chp %>% 
  ggplot(aes(x=category,y=hourly_earn,color=category))+
  geom_sina() + scale_color_discrete(guide="none")

Business types and activities

YouTube - Level 1

For all YouTube videos, the predominant Business Type for the money-making ideas was Publication, Media, and Blogs, followed by the Service Business. Other business types were less common.

# without earnings
yt_acts_chp <- youtube_data_activities  %>% group_by(yt_video_id,idea) %>%
  chop(business_type_level_1) %>% chop(business_type_level_2) %>% chop(skills_required) %>% ungroup()

yt_bus1 <- youtube_data_activities  %>% group_by(yt_video_id,idea) %>%
  unchop(business_type_level_1) %>%  ungroup()

bus1ct <- yt_bus1 %>% group_by(yt_video_id,idea) %>% 
  distinct(yt_video_id,idea,business_type_level_1) %>% 
  tabyl(business_type_level_1) %>% arrange(-n)

bus1ct %>% na.omit() %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% 
  gt() %>% tab_header("YouTube",subtitle = "Business Types, all videos") 
YouTube
Business Types, all videos
business_type_level_1 n percent
Publication, Media & Blog 175 0.45
Service Business 122 0.31
Ecommerce & Consumer 56 0.14
Investing 27 0.07
Software & Tech 9 0.02


For videos and ideas with reported earnings, the business activity with the highest earnings (standardized) was Publication, Media, & Blog, followed by investing.

# with earninings

yt_hourlycorrs_bus1 <- yt_hourlycorrs  %>% group_by(yt_video_id,idea) %>%
unchop(business_type_level_1) %>%  ungroup()

yt_hourlycorrs_bus1 %>% group_by(yt_video_id,idea) %>% 
  distinct(yt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup() %>% 
  group_by(business_type_level_1) %>% 
  summarise(mean_earn=mean(hourly_earn),
            median_earn=median(hourly_earn)) %>% 
  arrange(-median_earn) %>% gt() %>% tab_header("YouTube",
                                                subtitle = "standardized hourly earning by business types")
YouTube
standardized hourly earning by business types
business_type_level_1 mean_earn median_earn
Publication, Media & Blog 163.73000 70.00000
Investing 31.20833 26.25000
Service Business 76.22654 25.00000
Software & Tech 20.00000 20.00000
Ecommerce & Consumer 48.28748 16.77083


However, there is considerable variation in earnings across the different business types

yt_hourlycorrs_bus1 %>% group_by(yt_video_id,idea) %>% 
  distinct(yt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup %>% 
  ggplot()+
  geom_sina(aes(x=str_wrap(business_type_level_1,12),
                  y=hourly_earn,color=business_type_level_1))+
  scale_color_discrete(guide="none")+labs(x="Business Type (level 1)")

YouTube - Level 2

yt_bus2 <- youtube_data_activities  %>% group_by(yt_video_id,idea) %>%
  unchop(business_type_level_2) %>%  ungroup()
n_bus2 <- youtube_data_activities %>% distinct(business_type_level_2) %>% nrow()
maxnbus2 <- yt_bus2 %>% group_by(yt_video_id,idea) %>% 
    distinct(yt_video_id,idea,business_type_level_2) %>% ungroup() %>% group_by(yt_video_id,idea) %>% summarise(nbus2=n()) %>% arrange(-nbus2) %>% pull(nbus2) %>% max()

The second-level classification of Business Activities for making money includes many more categories (51). Many combinations of Business Types were possible for each video/idea, but none included more than 3.

At this level no particular business type predominated, none represented >20% of suggested activities. The most frequent business type was Publication, Media & Blog - Affiliate Marketing, followed by Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images). Other types were much less common.

bus2ct <- yt_bus2 %>% group_by(yt_video_id,idea) %>% 
  distinct(yt_video_id,idea,business_type_level_2) %>% 
  tabyl(business_type_level_2) %>% arrange(-n)

bus2ct %>% na.omit() %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% filter(n>1) %>% 
  gt() %>% tab_header("YouTube",subtitle = "Business Types (LEVEL 2), all videos") %>% tab_footnote(footnote = "n=1 not shown",
                           locations = cells_column_labels(
                             columns = n
                           )) 
YouTube
Business Types (LEVEL 2), all videos
business_type_level_2 n1 percent
Publication, Media & Blog - Affiliate Marketing 86 0.19
Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images) 64 0.14
Publication, Media & Blog - YouTube 35 0.08
Service Business - Other freelance (e.g. on Upwork, Fiverr) 25 0.06
Service Business - Agency (e.g. Social Media Marketing, SEO, etc.) 19 0.04
Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy) 18 0.04
Publication, Media & Blog - Courses 17 0.04
Ecommerce & Consumer - Dropshipping 14 0.03
Ecommerce & Consumer - Online Shop 14 0.03
Investing - Crypto 13 0.03
Publication, Media & Blog - Write a blog 11 0.02
Ecommerce & Consumer - Amazon FBA 10 0.02
Investing - Stocks 10 0.02
Ecommerce & Consumer - Sell belongings on marketplaces (e.g. Ebay, Craigslist, Etsy) 9 0.02
Investing - Real estate investing (e.g. House flipping/ Crowdfunding) 8 0.02
Publication, Media & Blog - Selling Digital Products (EBooks/Print On Demand etc)) 8 0.02
Publication, Media & Blog - Content Creator 7 0.02
Publication, Media & Blog - Influencer 5 0.01
Publication, Media & Blog - Youtube/Rumble/Pinterest Video Automation 5 0.01
Service Business - Become a Virtual Assistant 5 0.01
Software & Tech - Create A Website 5 0.01
Service Business - Freelance Writer 4 0.01
Ecommerce & Consumer - Facebook Marketplace 3 0.01
Publication, Media & Blog - Paid Community (Patreon) 3 0.01
Publication, Media & Blog - Spinning Articles 3 0.01
Publication, Media & Blog - Youtube Sponsorships 3 0.01
Service Business - Rent out stuff (e.g. storage space, truck) 3 0.01
Publication, Media & Blog - Image Sharing 2 0.00
Publication, Media & Blog - Membership Sites 2 0.00
Publication, Media & Blog - Newsletter/ Articles 2 0.00
Publication, Media & Blog - NFT's 2 0.00
Publication, Media & Blog - Podcasting 2 0.00
Publication, Media & Blog - Write a book 2 0.00
Service Business - Home Delivery Services 2 0.00
Service Business - Home Services (e.g. Power Washing, Pet sitting) 2 0.00
Service Business - Video Editor 2 0.00
Software & Tech - Create a mobile app 2 0.00
Software & Tech - Create a software 2 0.00
Software & Tech - Create Templates 2 0.00
1 n=1 not shown

The Business type with the highest mean standardized earnings was Publication, Media & Blog - Newsletter/ Articles, followed by Ecommerce & Consumer - Dropshipping and other types in the Publication/Media/Blogging fields.

yt_hourlycorrs_bus2 <- yt_hourlycorrs  %>% group_by(yt_video_id,idea) %>%
unchop(business_type_level_2) %>%  ungroup()
yt_hourlycorrs_bus2 %>% group_by(yt_video_id,idea) %>% 
  distinct(yt_video_id,idea,business_type_level_2,hourly_earn) %>% ungroup() %>% 
  group_by(business_type_level_2) %>% 
  summarise(mean_earn=mean(hourly_earn),
            median_earn=median(hourly_earn)) %>% 
  arrange(-median_earn) %>% gt() %>% tab_header("YouTube, Business Type Level 2",
                                                subtitle = "standardized hourly earning by business types")
YouTube, Business Type Level 2
standardized hourly earning by business types
business_type_level_2 mean_earn median_earn
Publication, Media & Blog - Write a blog 925.00000 925.00000
Publication, Media & Blog - Newsletter/ Articles 312.50000 312.50000
Ecommerce & Consumer - Dropshipping 255.56820 255.56820
Publication, Media & Blog - Youtube Sponsorships 230.62500 230.62500
Publication, Media & Blog - Spinning Articles 718.33333 180.00000
Service Business - Sales Representative/Advisor 170.00000 170.00000
Publication, Media & Blog - Affiliate Marketing 179.31921 105.65000
Publication, Media & Blog - Youtube/Rumble/Pinterest Video Automation 105.00000 105.00000
Publication, Media & Blog - YouTube 118.58227 75.00000
Publication, Media & Blog - Membership Sites 70.00000 70.00000
Service Business - Rent out stuff (e.g. storage space, truck) 68.75000 68.75000
Investing - Crypto 62.50000 62.50000
Publication, Media & Blog - Selling Digital Products (EBooks/Print On Demand etc)) 58.42406 56.25000
Publication, Media & Blog - Content Creator 50.00000 50.00000
Software & Tech - Create A Website 47.50000 47.50000
Service Business - Home Services (e.g. Power Washing, Pet sitting) 45.62500 45.62500
Service Business - Other freelance (e.g. on Upwork, Fiverr) 264.55625 40.62500
Service Business - Agency (e.g. Social Media Marketing, SEO, etc.) 47.08333 37.50000
Service Business - Become a Virtual Assistant 40.62500 35.00000
Investing - Real estate investing (e.g. House flipping/ Crowdfunding) 31.35417 31.35417
Service Business - Freelance Writer 40.50000 30.00000
Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images) 113.01125 28.50000
Publication, Media & Blog - NFT's 26.25000 26.25000
Publication, Media & Blog - Creating Spotify Ads 25.00000 25.00000
Service Business - Home Delivery Services 22.41667 22.41667
Publication, Media & Blog - Courses 77.02381 20.00000
Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy) 20.84028 16.77083
Service Business - Video Editor 15.00000 15.00000
Publication, Media & Blog - Paid Community (Patreon) 27.30518 10.00000
Service Business - Data Entry 10.00000 10.00000
Ecommerce & Consumer - Online Shop 9.09500 9.09500
Investing - Stocks 22.33333 4.00000

The five business types at this level with n>6 show a wide range of earnings.

yt_hourlycorrs_bus2 %>% group_by(yt_video_id,idea) %>% 
  distinct(yt_video_id,idea,business_type_level_2,hourly_earn) %>% ungroup %>% add_count(business_type_level_2) %>% filter(n>6) %>% 
  ggplot()+
  geom_sina(aes(x=str_wrap(business_type_level_2,33),
                  y=hourly_earn,color=business_type_level_2))+
  scale_color_discrete(guide="none")+labs(x="Business Type (level 1)")+
  coord_flip()+labs(x="Business Type Level 2")

TikTok - Level 1

For TikTok videos, the predominant Business Type for the money-making ideas was Service Business with almost 50% of videos, followed by the Ecommerce & Consumer ventures. Other business types were less common.

# tt without earnings
tt_acts_chp <- tiktok_data_activities  %>% group_by(tt_video_id,idea) %>%
  chop(business_type_level_1) %>% chop(business_type_level_2) %>% chop(skills_required) %>% ungroup()

tt_bus1 <- tiktok_data_activities  %>% group_by(tt_video_id,idea) %>%
  unchop(business_type_level_1) %>%  ungroup()

tt_bus1ct <- tt_bus1 %>% group_by(tt_video_id,idea) %>% 
  distinct(tt_video_id,idea,business_type_level_1)  %>% 
  tabyl(business_type_level_1) %>% arrange(-n)


tt_bus1ct %>% na.omit() %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% 
  gt() %>% tab_header("TikTok",subtitle = "Business Types, all videos") 
TikTok
Business Types, all videos
business_type_level_1 n percent
Service Business 74 0.49
Ecommerce & Consumer 36 0.24
Publication, Media & Blog 28 0.19
Investing 12 0.08
Software & Tech 1 0.01

For videos and ideas with reported earnings, the business activity with the highest earnings (standardized) was Investing, followed by Ecommerce & Consumer

# tt with earninings
tt_hourlycorrs <- left_join(tt_hourly_earn,tiktok_data_activities)

tt_hourlycorrs_chp <- tt_hourlycorrs  %>% group_by(tt_video_id,idea) %>%
  chop(business_type_level_1) %>% chop(business_type_level_2) %>% chop(skills_required) %>% ungroup()

tt_hourlycorrs_bus1 <- tt_hourlycorrs  %>% group_by(tt_video_id,idea) %>%
  unchop(business_type_level_1) %>%  ungroup()

tt_hourlycorrs_bus1 %>% group_by(tt_video_id,idea) %>% 
  distinct(tt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup() %>% 
  group_by(business_type_level_1) %>% 
  summarise(mean_earn=mean(hourly_earn),
            median_earn=median(hourly_earn)) %>% 
  arrange(-median_earn) %>% gt()
business_type_level_1 mean_earn median_earn
Ecommerce & Consumer 227.55849 134.90062
Investing 82.22282 56.11292
Service Business 102.44798 40.00000
Publication, Media & Blog 261.69231 37.50000
Software & Tech 1.48500 1.48500

However, with some exceptions, earnings do not vary considerably across the different business types

tt_hourlycorrs_bus1 %>% group_by(tt_video_id,idea) %>% 
  distinct(tt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup %>% 
  ggplot()+
  geom_jitter(aes(x=str_wrap(business_type_level_1,12),
                  y=hourly_earn,color=business_type_level_1))+
  scale_color_discrete(guide="none")+labs(x="Business Type (level 1)",subtitle = "TikTok")

# 1 saving plots in pdf with example

# ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color= drv)) + 
#      geom_smooth(mapping = aes(linetype = drv), method = 'loess') +
#      geom_point()
# 
# if(save == T){ 
#   ggsave(here::here("plots", "name_plot.pdf"), 
#          width = 12.5, height = 8, device = cairo_pdf)
# }

# 2 pdfs will then be converted into the pngs using the 04_convert_pdfs_to_pngs.rmd file. 

View counts, comments, followers, and standardized earnings are not tightly associated.

yt_hourly_renamed <-  yt_hourlycorrs_chp %>%  
  select(earn=hourly_earn,views=view_count,source,comments,followers=subs_numeric)
  
tt_hourly_renamed <-  tt_hourlycorrs_chp %>%  
  select(earn=hourly_earn,views=play_count,source,comments=comment_count,
           followers=author_meta_fans)

hourlyboth <- bind_rows(yt_hourly_renamed,tt_hourly_renamed)

ggplot(hourlyboth)+aes(x=views,y=earn,color=source)+geom_point()

ggplot(hourlyboth)+aes(x=comments,y=earn,color=source)+geom_point()

ggplot(hourlyboth)+aes(x=followers,y=earn,color=source)+geom_point()

TikTok - Level 2

n_bus2tt <- tiktok_data_activities %>% distinct(business_type_level_2) %>% nrow()

tt_bus2 <- tiktok_data_activities  %>% group_by(tt_video_id,idea) %>%
  unchop(business_type_level_2) %>%  ungroup()
maxnbus2tt <- tt_bus2 %>% group_by(tt_video_id,idea) %>% 
    distinct(tt_video_id,idea,business_type_level_2) %>% ungroup() %>% group_by(tt_video_id,idea) %>% summarise(nbus2=n()) %>% arrange(-nbus2) %>% pull(nbus2) %>% max()

TikTok videos mentioned fewer (33) Business Activities in this level than Youtube videos. Many combinations of Business Types were possible for each video/idea, but none included more than 3.

No particular business type predominated, none represented >20% of suggested activities. The most frequent business type was Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images), followed by Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy). Other types were much less common.

bus2ctt <- tt_bus2 %>% group_by(tt_video_id,idea) %>% 
  distinct(tt_video_id,idea,business_type_level_2) %>% 
  tabyl(business_type_level_2) %>% arrange(-n)

bus2ctt %>% na.omit() %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% filter(n>1) %>% 
  gt() %>% tab_header("TikTok",subtitle = "Business Types (LEVEL 2), all videos") %>% tab_footnote(footnote = "n=1 not shown",
                           locations = cells_column_labels(
                             columns = n
                           )) 
TikTok
Business Types (LEVEL 2), all videos
business_type_level_2 n1 percent
Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images) 30 0.16
Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy) 16 0.09
Ecommerce & Consumer - Dropshipping 14 0.08
Service Business - Agency (e.g. Social Media Marketing, SEO, etc.) 10 0.05
Publication, Media & Blog - Affiliate Marketing 9 0.05
Service Business - Other freelance (e.g. on Upwork, Fiverr) 9 0.05
Ecommerce & Consumer - Amazon FBA 8 0.04
Investing - Real estate investing (e.g. House flipping/ Crowdfunding) 8 0.04
Ecommerce & Consumer - Online Shop 7 0.04
Publication, Media & Blog - Selling Digital Products (EBooks/Print On Demand etc)) 7 0.04
Publication, Media & Blog - YouTube 6 0.03
Service Business - Sales Representative/Advisor 5 0.03
Service Business - Vending Machine 5 0.03
Ecommerce & Consumer - Facebook Marketplace 4 0.02
Publication, Media & Blog - Image Sharing 4 0.02
Publication, Media & Blog - Youtube/Rumble/Pinterest Video Automation 4 0.02
Service Business - Become a Virtual Assistant 4 0.02
Service Business - Home Delivery Services 4 0.02
Service Business - Home Services (e.g. Power Washing, Pet sitting) 4 0.02
Ecommerce & Consumer - Sell belongings on marketplaces (e.g. Ebay, Craigslist, Etsy) 3 0.02
Investing - Stocks 3 0.02
Publication, Media & Blog - NFT's 3 0.02
Service Business - Freelance Writer 3 0.02
Service Business - Rent out room (e.g. Airbnb) 3 0.02
Service Business - Car Wash 2 0.01
Service Business - Furniture Flipping 2 0.01
Service Business - Rent out stuff (e.g. storage space, truck) 2 0.01
1 n=1 not shown

The Business type with the highest mean standardized earnings was Publication, Media & Blog - Write a blog (although this result is driven by a single video stating $300000 in monthly earnings through this method). After that Service Business - Furniture Flipping and Investing - Stocks have similar earnings, followed by various others.

tt_hourlycorrs_bus2 <- tt_hourlycorrs  %>% group_by(tt_video_id,idea) %>%
unchop(business_type_level_2) %>%  ungroup()

tt_hourlycorrs_bus2 %>% group_by(tt_video_id,idea) %>% 
  distinct(tt_video_id,idea,business_type_level_2,hourly_earn) %>% ungroup() %>% 
  group_by(business_type_level_2) %>% 
  summarise(mean_earn=mean(hourly_earn),
            median_earn=median(hourly_earn)) %>% 
  arrange(-median_earn) %>% gt() %>% tab_header("TikTok, Business Type Level 2",
                                                subtitle = "standardized hourly earning by business types")
TikTok, Business Type Level 2
standardized hourly earning by business types
business_type_level_2 mean_earn median_earn
Publication, Media & Blog - Write a blog 1500.0000000 1500.0000000
Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy) 245.8878889 150.0000000
Service Business - Furniture Flipping 140.6250000 140.6250000
Investing - Real estate investing (e.g. House flipping/ Crowdfunding) 135.0000000 135.0000000
Ecommerce & Consumer - Amazon FBA 195.7437500 119.8012500
Ecommerce & Consumer - Online Shop 100.0000000 100.0000000
Service Business - Agency (e.g. Social Media Marketing, SEO, etc.) 73.8194444 75.0000000
Service Business - Other freelance (e.g. on Upwork, Fiverr) 79.8809524 75.0000000
Service Business - Home Delivery Services 63.4843750 63.4843750
Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images) 160.4275000 61.5000000
Investing - Stocks 55.8342361 55.8342361
Ecommerce & Consumer - Dropshipping 184.4657571 50.0000000
Service Business - Car Wash 50.0000000 50.0000000
Service Business - Home Services (e.g. Power Washing, Pet sitting) 45.0000000 40.0000000
Publication, Media & Blog - Affiliate Marketing 326.0000000 37.5000000
Publication, Media & Blog - YouTube 501.5000000 37.5000000
Publication, Media & Blog - Youtube/Rumble/Pinterest Video Automation 323.3333333 37.5000000
Service Business - Freelance Writer 68.0555556 30.0000000
Publication, Media & Blog - Selling Digital Products (EBooks/Print On Demand etc)) 45.8333333 25.0000000
Service Business - Become a Virtual Assistant 29.9950000 21.9850000
Service Business - Rent out room (e.g. Airbnb) 18.3175000 18.3175000
Service Business - Sales Representative/Advisor 18.7400000 18.0000000
Ecommerce & Consumer - Sell belongings on marketplaces (e.g. Ebay, Craigslist, Etsy) 9.1636667 9.1636667
Software & Tech - Create a software 1.4850000 1.4850000
Service Business - Vending Machine 0.6703125 0.6703125

The six business types at this level with n>5 also show a wide range of earnings.

tt_hourlycorrs_bus2 %>% group_by(tt_video_id,idea) %>% 
  distinct(tt_video_id,idea,business_type_level_2,hourly_earn) %>% ungroup %>% add_count(business_type_level_2) %>% filter(n>5) %>% 
  ggplot()+
  geom_sina(aes(x=str_wrap(business_type_level_2,33),
                  y=hourly_earn,color=business_type_level_2))+
  scale_color_discrete(guide="none")+labs(x="Business Type (level 1)")+
  coord_flip()+labs(x="Business Type Level 2")

Top earners

On YouTube, these three videos (from three different creators) report the highest earnings.

yt_hourlycorrs_chp %>% slice_max(hourly_earn,n = 3) %>% select(video_url,title,author,earnings_timeframe,earn,hourly_earn) %>% gt()
video_url title author earnings_timeframe earn hourly_earn
https://www.youtube.com/watch?v=h6C0Dq_wcJ0 FREE Bot Pays You $30.00 Per Minute in Passive Income [Make Money Online] Online Hustle Minutes 30 1800
https://www.youtube.com/watch?v=1_XD-J0u5E8 NEW $7000/Day Copy Paste Website Pays Beginners! (Make Money Online) Online Hustle Days 7000 875
https://www.youtube.com/watch?v=YDZ3M0Az8BU 7 Ways To Make Your First $100,000 Online Iman Gadzhi Months 150000 750

On TikTok, these three videos (from three different creators) report the highest earnings.

tt_hourlycorrs_chp %>% slice_max(hourly_earn,n = 3) %>% select(web_video_url,text,author_meta_nick_name,earnings_timeframe,earn,hourly_earn) %>% gt()
web_video_url text author_meta_nick_name earnings_timeframe earn hourly_earn
https://www.tiktok.com/@adamenfroy/video/7132196947120753963 The most underrated #sidehustle? #makemoneyonline #blogging #affiliatemarketing adamenfroy Months 300000 1500.0
https://www.tiktok.com/@lukeisshorts/video/7119011507559664942 Anybody can do it #investor #passiveincome #makemoneyfromhome Luke Robins Months 184500 922.5
https://www.tiktok.com/@ecomjoshcarter/video/6924254455114370305 I QUIT MY 9-5... #ebay #dropshipping #ecom #makemoney ecomjoshcarter Days 7000 875.0

In particular, author “Adam Enfroy” appears on both of these platforms reporting 100-300k USD in monthly earnings.

Bottom earners

yt_hourlycorrs_chp %>% slice_min(hourly_earn,n = 3) %>% select(video_url,title,author,earnings_timeframe,earn,hourly_earn) %>% gt()
video_url title author earnings_timeframe earn hourly_earn
https://www.youtube.com/watch?v=WGxKjgy7R4g Stupid App Gives $500 To Beginners Who DO NOTHING [Make Money Online] Online Hustle Months 15 0.0750000
https://www.youtube.com/watch?v=_bUsIgEFTTc 7.5 Passive Income Ideas To Easily Make $500/Day Vincent Chan Years 500 0.2083333
https://www.youtube.com/watch?v=N7LFPNYgMOI Best 10 Apps That Pay You Real Money | Make Money Online Mr Reis Months 55 0.2750000
tt_hourlycorrs_chp %>% slice_min(hourly_earn,n = 3) %>% select(web_video_url,text,author_meta_nick_name,earnings_timeframe,earn,hourly_earn) %>% gt()
web_video_url text author_meta_nick_name earnings_timeframe earn hourly_earn
https://www.tiktok.com/@pristinevending/video/7112240282468388138 How much did this machine make after 2 months? #vendingmachinebusiness #vendingmachine #sidehustle #sidehustleideas #drinkmachine PristineVending Months 18.375000 0.0918750
https://www.tiktok.com/@ebrahim_ka/video/6875602063842086146 5 ways to make money at home #howtomakemoney #makemoney #moneyathome #dubai Ebrahim Months 52.000000 0.2600000
https://www.tiktok.com/@mattlorion/video/6840985541836623109 Turning $0 into $100,000 (Part 1) #fyp #entrepreneur #makemoney #flipping Matt Lorion Days 2.618667 0.3273333

Skills

Different money-making ideas on both platforms varied in the number of skills needed to generate earnings. For the most part, each idea needed only one or two different skills, and this was more evident on TikTok (much shorter videos with generally only one money-making idea).

skills_per_idea_yt <- 
youtube_data_activities %>% select(yt_video_id,idea,skills_required) %>% 
  group_by(yt_video_id,idea) %>%  distinct() %>% 
summarise(n_skills=n()) %>% ungroup() %>% mutate(source="YouTube") %>% 
    select(n_skills,source)

skills_per_idea_tt <- 
tiktok_data_activities %>% select(tt_video_id,idea,skills_required) %>% 
  group_by(tt_video_id,idea) %>%  distinct() %>% 
summarise(n_skills=n()) %>% ungroup()%>% mutate(source="TikTok") %>% 
  select(n_skills,source)

bind_rows(skills_per_idea_tt,skills_per_idea_yt) %>% 
ggplot()+
  geom_histogram(aes(x=n_skills,fill=source))

The makeup of required skills also varied across platforms.

On YouTube, the most mentioned skill was Marketing followed by Image or Video Editing.

youtube_data_activities %>% select(yt_video_id,idea,skills_required) %>% distinct() %>% tabyl(skills_required) %>% arrange(-n) %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% na.omit %>% gt() %>% tab_header(title = "YouTube")
YouTube
skills_required n percent
Marketing 152 0.24
Image Editing 83 0.13
Video Editing 81 0.13
Writing 70 0.11
Investing 69 0.11
Doing mircowork for businesses 67 0.11
Speaking 54 0.09
Web Development 29 0.05
Programming 22 0.03
Providing Home Services 3 0.00
Gaming 2 0.00

When an idea or video required two or more skills, the most common combinations were Speaking + Video Editing, Investing + Marketing, Speaking + Writing, and Image + Video Editing.

youtube_data_activities %>% select(yt_video_id,idea,skills_required) %>% 
  group_by(yt_video_id,idea) %>% 
  arrange(yt_video_id, skills_required) %>% 
  summarize(combination = paste0(skills_required, collapse = " - "), .groups = "drop") %>% 
  count(combination) %>% 
  filter(str_detect(combination," - ")) %>%  arrange(-n) %>% slice(1:10) %>% 
  gt() %>% tab_header("YouTube Data", subtitle="Combinations of >2 skills, Top 10 most common combinations shown")
YouTube Data
Combinations of >2 skills, Top 10 most common combinations shown
combination n
Speaking - Video Editing 13
Investing - Marketing 12
Speaking - Writing 12
Image Editing - Video Editing 7
Image Editing - Marketing 6
Image Editing - Marketing - Programming - Video Editing - Web Development - Writing 6
Image Editing - Investing 5
Image Editing - Speaking - Video Editing 4
Image Editing - Speaking - Video Editing - Writing 4
Investing - Investing 4

TikTok videos favored Investing, followed by Marketing and Writing.

tiktok_data_activities %>% group_by(tt_video_id,idea) %>% tabyl(skills_required) %>% arrange(-n) %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% na.omit %>% gt() %>% tab_header(title = "TikTok")
TikTok
skills_required n percent
Investing 73 0.26
Marketing 57 0.20
Writing 37 0.13
Image Editing 33 0.12
Doing mircowork for businesses 32 0.11
Video Editing 21 0.07
Providing Home Services 9 0.03
Speaking 8 0.03
Web Development 8 0.03
Programming 5 0.02
Gaming 1 0.00

Despite the fewer videos that required >1 skill, there were some frequently mentioned skill combinations such as Investing + Marketing, followed by Marketing+Programming+Video Editing+Web Development+Writing.

tiktok_data_activities %>% select(tt_video_id,idea,skills_required) %>% 
  group_by(tt_video_id,idea) %>% 
  arrange(tt_video_id, skills_required) %>% 
  summarize(combination = paste0(skills_required, collapse = " - "), .groups = "drop") %>% 
  count(combination) %>% 
  filter(str_detect(combination," - ")) %>%  arrange(-n) %>% slice(1:10) %>% 
  gt() %>% tab_header("TikTok Data", subtitle="Combinations of >2 skills, Top 10 most common combinations shown")
TikTok Data
Combinations of >2 skills, Top 10 most common combinations shown
combination n
Investing - Marketing 9
Marketing - Programming - Video Editing - Web Development - Writing 4
Doing mircowork for businesses - Doing mircowork for businesses - Writing - Writing 3
Doing mircowork for businesses - Speaking 3
Image Editing - Image Editing 3
Marketing - Marketing 3
Doing mircowork for businesses - Writing 2
Image Editing - Image Editing - Image Editing - Image Editing - Investing - Investing - Investing - Investing 2
Image Editing - Image Editing - Investing - Investing - Marketing - Marketing 2
Image Editing - Image Editing - Marketing - Marketing 2

Skills and earnings

Across both platforms, the Skill with the highest mean standardized earnings (hourly) was Investing, followed by Marketing, Video Editing, and Writing.

ttskillearn <- 
tt_hourlycorrs %>% select(tt_video_id,idea,hourly_earn,skills_required) %>% group_by(tt_video_id,idea) %>% distinct() %>% 
  group_by(skills_required) %>% 
  summarise(mean_earn=mean(hourly_earn),
            median_earn=median(hourly_earn)) %>% 
  arrange(-mean_earn) 

ytskillearn <- 
yt_hourlycorrs %>% select(yt_video_id,idea,hourly_earn,skills_required) %>% group_by(yt_video_id,idea) %>% distinct() %>% 
  group_by(skills_required) %>% 
  summarise(mean_earn=mean(hourly_earn),
            median_earn=median(hourly_earn)) %>% 
  arrange(-mean_earn)

ytskillearns <- 
yt_hourlycorrs %>% select(yt_video_id,idea,hourly_earn,skills_required) %>% group_by(yt_video_id,idea) %>% distinct() %>% 
  ungroup %>%  mutate(source="YouTube")

ttskillearns <-  
tt_hourlycorrs %>% select(tt_video_id,idea,hourly_earn,skills_required) %>% group_by(tt_video_id,idea) %>% distinct() %>% 
ungroup %>% mutate(source="TikTok")

bothearn <- bind_rows(ytskillearns,ttskillearns) %>% group_by(skills_required) %>% 
  summarise(mean_earn=mean(hourly_earn),
            median_earn=median(hourly_earn)) %>% 
  arrange(-mean_earn)

gt(bothearn) %>% tab_header(title='all platforms',subtitle = "earnings by skill")
all platforms
earnings by skill
skills_required mean_earn median_earn
Marketing 162.62975 62.50000
Investing 144.76703 55.55556
Video Editing 140.88650 50.00000
Doing mircowork for businesses 135.10391 46.50000
Writing 131.10958 25.99250
Speaking 100.22660 25.00000
Providing Home Services 40.89468 27.96875
Image Editing 40.24922 25.00000
Programming 38.74833 40.00000
Web Development 37.03615 37.50000
Gaming 36.00000 36.00000

By platform, differences in reported earnings appear. On YouTube, Marketing and Investing report the highest mean earnings. On TikTok, Video Editing and Investing report the highest earnings.

gt(ytskillearn)%>% tab_header(title='YouTube',subtitle = "earnings by skill")
YouTube
earnings by skill
skills_required mean_earn median_earn
Marketing 146.43900 62.50000
Writing 138.97708 25.00000
Doing mircowork for businesses 109.94519 45.00000
Investing 107.55416 40.00000
Video Editing 80.54883 52.05250
Speaking 47.41574 20.00000
Web Development 43.12500 38.75000
Programming 42.75000 40.00000
Image Editing 36.72155 21.04167
Providing Home Services 18.69444 19.83333
gt(ttskillearn) %>% tab_header(title='TikTok',subtitle = "earnings by skill")
TikTok
earnings by skill
skills_required mean_earn median_earn
Video Editing 364.99786 37.5000
Speaking 241.05556 237.5000
Marketing 205.80511 50.0000
Doing mircowork for businesses 170.85579 48.0000
Investing 169.57562 100.0000
Writing 121.66858 25.9925
Image Editing 55.06547 50.0000
Providing Home Services 51.99479 45.0000
Gaming 36.00000 36.0000
Programming 33.74625 33.4925
Web Development 27.29400 21.9850

Additionally, TikTok videos report significantly higher earnings for the same skills compared with YouTube videos.

ytskearnsummary <- ytskillearn %>% mutate(source="YouTube")
ttskearnsummary <- ttskillearn %>% mutate(source="TikTok")
bind_rows(ytskearnsummary,ttskearnsummary) %>% 
  ggplot()+
  geom_bar(aes(x=fct_reorder(skills_required,mean_earn),y=mean_earn,fill=source),stat="identity",position = "dodge")+coord_flip()+labs(x='skill')

The variation and spread of earnings by skill is consistent across platforms.

bind_rows(ytskillearns,ttskillearns) %>% 
  ggplot(aes(x=fct_reorder(skills_required,hourly_earn),y=hourly_earn,color=source))+
  geom_sina()+labs(x='skill')+
  coord_flip()

More required skills did not relate with greater earnings.

# skills_per_idea_ytid <- 
# youtube_data_activities %>% select(yt_video_id,idea,skills_required) %>% 
#   group_by(yt_video_id,idea) %>%  distinct() %>% 
# summarise(n_skills=n()) %>% ungroup() %>% mutate(source="YouTube")  
# 
# skills_per_idea_ttid <- 
# tiktok_data_activities %>% select(tt_video_id,idea,skills_required) %>% 
#   group_by(tt_video_id,idea) %>%  distinct() %>% 
# summarise(n_skills=n()) %>% ungroup()%>% mutate(source="TikTok")  

skillsearntt <- left_join(skills_per_idea_tt,tt_hourly_earn)
skillsearnyt <- left_join(skills_per_idea_yt,yt_hourly_earn)
bind_rows(skillsearntt,skillsearnyt) %>% 
  ggplot(aes(x=factor(n_skills),hourly_earn))+geom_boxplot()+
  labs(x="number of skills required per money-making idea",y="standardized earnings")

Video titles

In general, the video titles vary considerably across platforms in terms of length, content and style.

tiktok_data <-  tiktok_data %>% mutate(title_noHash=str_extract(text,"^[^#]*")) 
yt_tlength <- round(mean(str_length(youtube_data$title)),0)
tt_tlength <-round(mean(str_length(tiktok_data$text)))
tt_tlength_nh <-round(mean(str_length(tiktok_data$title_noHash)))

Without various trailing hashtags, YouTube video titles are on average, twice as long as TikTok titles (65 vs. 31 characters). Overall, roughly a third of the length of TikTok titles comprises various hashtags.

The words and bigrams (consecutive sequences of two words) that appear most frequently in the video’s titles vary significantly between platforms.

# tokenize 
stopwords <- c("for","in","a","the","to","with","from","by")
title_words_yt <- youtube_data %>% unnest_tokens(title_wrd,title,token = "words") %>% 
  filter(!title_wrd %in% stopwords)
title_bigrams_yt <- youtube_data %>% unnest_tokens(title_bg,title,token = "ngrams",n=2) 
title_words_tt <- tiktok_data %>% unnest_tokens(title_wrd,text,token = "tweets") %>% 
  filter(!title_wrd %in% stopwords)
title_bigrams_tt <- title_words_tt %>% mutate(nextwrdbg=lead(title_wrd)) %>% 
  unite(title_bg, title_wrd, nextwrdbg, sep = ' ')


wordsyt <- title_words_yt %>% count(title_wrd) %>% slice_max(n,n=15) %>% mutate(source="YouTube")
wordstt <- title_words_tt %>% count(title_wrd) %>% slice_max(n,n=15) %>% mutate(source="TikTok")
bg_yt <- title_bigrams_yt %>% count(title_bg) %>% slice_max(n,n=15) %>% mutate(source="YouTube")
bg_tt <- title_bigrams_tt %>% count(title_bg) %>% slice_max(n,n=15) %>% mutate(source="TikTok")
top15wrds <- bind_rows(wordsyt,wordstt)
top15bg <- bind_rows(bg_yt,bg_tt)

ggplot(top15wrds)+
  geom_lollipop(aes(x=fct_reorder(title_wrd,n),y=n))+
  facet_wrap(~source)+labs(x="word or hashtag",y='occurrences')+
  coord_flip()

ggplot(top15bg)+
  geom_lollipop(aes(x=fct_reorder(title_bg,n),y=n))+
  facet_wrap(~source)+labs(x="bigram",y='occurrences')+
  coord_flip()

Considering the top 15 words or bigrams, there is little overlap between platforms.